#include "CComp.h"
#include "TSpell.h"
#include "UError.h"

#define FIRSTMASK  0xF   // 240
#define SECONDMASK 0x10  // 16
#define EOW     (0)
#define ESCAPE  (1)

#ifndef NOTOK
#define NOTOK  (0)
#endif

#ifndef OK
#define OK     (1)
#endif


CCompress::CCompress()
{
	CountArray = NULL;
}


CCompress::~CCompress()
{
	if (CountArray != NULL)
		free(CountArray);
}




/*****************************************************************************
  GetValue gets the value (0-15) that is stored in buff at offset.  offset is
  given in 4 bit nibbles.  The value stored here is returned from GetValue.
****************************************************************************/

inline short CCompress::GetValue(UInt8 *buff, short *offset)
{
	if ((*offset) % 2) {
		// IS ODD
		return (buff[(*offset) >> 1] & FIRSTMASK);
	} else {									// IS EVEN
		return (buff[(*offset) >> 1] >> 4);
	}
}

/****************************************************************************
  PutValue puts a value ch (0-15) in the buffer buff at offset.  offset is given
  as the nth 4 bit nibble.
***************************************************************************/

inline void CCompress::PutValue(UInt8 *buff, short *offset, UInt8 ch)
{
	UInt8 temp;
	// assert(ch <= 15)
	//       if(ch > 15) printf("\n\a\a\aValue is %d",ch);

	if ((*offset) % 2) {
		// IS ODD
		temp = (buff[(*offset) >> 1] >> 4);
		buff[(*offset) >> 1] = 0;
		buff[(*offset) >> 1] = ((temp << 4) | (ch));
	} else {									// IS EVEN STEVEN
		temp = (buff[(*offset) >> 1] & FIRSTMASK);
		buff[(*offset) >> 1] = ((ch << 4) | (temp));
		
	}
}

/****************************************************************************
  GetChar gets the value stored in buff at offset and converts it to an alphabetic
  character using the conversion table.  The character is returned from GetChar.
  note: offset is incremented automatically (incremenation may be by more than one.
***************************************************************************/

short CCompress::GetChar(UInt8 *buff, short *offset)
{
	short X = 0;
	char Ch = 89;								// arbitrary value (not EOW or ESCAPE)

	while ((Ch = (unsigned char) GetValue(buff, offset)) == ESCAPE) {
		X++;
		(*offset)++;
	}
	(*offset)++;
	if (Ch == EOW)
		return EOW;
	Ch -= 2;									// to make up for EOW and ESCAPE
	return ((short)CharacterArray[X][Ch]);
}

/****************************************************************************
  PutChar puts the character ch in buff at offset. offset is the nth 4bit nibble
  in buff.  offset is incremented to the next position automatically.  offset
  can be incremented by more than one depending on whether the word uses an
  escape character or not.
****************************************************************************/

void CCompress::PutChar(UInt8 *buff, short *offset, UInt8 ch)
{
	short NumEscapes, CharOffset;
	register short Counter;

	//   ch -= 97;
	NumEscapes = CharacterMap[ch].Shift;
	CharOffset = CharacterMap[ch].Offset;		// was Offset-1
	for (Counter = 0; Counter < NumEscapes; Counter++) {
		PutValue(buff, offset, ESCAPE);
		(*offset)++;
	}
	PutValue(buff, offset, CharOffset + 2);
	(*offset)++;
}

/****************************************************************************
  GetRedundant reads the value in buff at offset and return it. offset is
  incremented.  This provides an easy tool for some of the other functions
  which need to read the number of redundant characters from the previous word.
  The Redundant is the number of characters which are redundant from the previous
  word.
***************************************************************************/

inline short CCompress::GetRedundant(UInt8 *buff, short *offset)
{
	short temp;

	temp = GetValue(buff, offset);
	(*offset)++;
	return (temp);
}

/****************************************************************************
  PutRedundant puts the number of redundant characters (up to 15) that apply to
  the next word from the previous word.  This function does not error check.
  The value is put in buff at offset (nth 4bit nibble).
***************************************************************************/

inline void CCompress::PutRedundant(UInt8 *buff, short *offset, UInt8 redundant)
{
	PutValue(buff, offset, redundant);
	(*offset)++;

}

/***************************************************************************
  PutEOW puts the reserved character EOW (end of word) in buff at offset.
**************************************************************************/

inline void CCompress::PutEOW(UInt8 *buff, short *offset)
{
	PutValue(buff, offset, EOW);
	(*offset)++;
}

/****************************************************************************
  GetWord puts reads the next word from buff and returns the next word in word.
  Note however that word is assumed to have a copy of the previous word in it
  unless reading the first word from a buffer.
***************************************************************************/

short CCompress::GetWord(UInt8 *Word, UInt8 *buff, short *offset)
{
	short ch;
	short WordOffset;

	WordOffset = GetRedundant(buff, offset);
	while ((ch = GetChar(buff, offset)) != EOW) {
		Word[WordOffset] = (unsigned char) ch;
		WordOffset++;
	}
	if (WordOffset == 0)
		return (NOTOK);  // there wasn't a word here/ end of series	
	Word[WordOffset] = EOW;						
	return (OK);
}


void CCompress::PutWord(UInt8 *buff, short *offset, UInt8 *Word, short length, UInt8 redundant)
{
	short Counter;

	PutRedundant(buff, offset, redundant);

	for (Counter = 0; Counter < length; Counter++)
		PutChar(buff, offset, Word[Counter]);
	PutEOW(buff, offset);

}


short CCompress::Same(UInt8 *Word1, UInt8 *Word2)
{
	register short counter;

	for (counter = 0; Word1[counter] == Word2[counter]; counter++)
		;
   if(counter > 15)
   	counter = 15;
	return (counter);
}


short CCompress::PrefixCompress(UInt8 *buff, short *offset, UInt8 *LastWord, UInt8 *CurrWord)
{
	UInt8 RedundantCharacters;

	RedundantCharacters = (unsigned char) Same(LastWord, CurrWord);

	if (RedundantCharacters > 15)
		RedundantCharacters = 15;
	PutWord(buff, offset, (CurrWord + RedundantCharacters),
		(strlen((char *)CurrWord) - RedundantCharacters), RedundantCharacters);
	return (RedundantCharacters);
}


short CCompress::CompressedWordLength(UInt8 *LastWord, UInt8 *CurrWord)
{
	UInt8 RedundantCharacters;
	short Length = 2;								// start off with two (redundunt characters + EOW)
	short Offset;
	short WordLength;

	RedundantCharacters = (unsigned char) Same(LastWord, CurrWord);
	if (RedundantCharacters > 15)
		RedundantCharacters = 15;
	WordLength = strlen((char *)CurrWord);
	for (Offset = RedundantCharacters; Offset < WordLength; Offset++)
		Length += (CharacterMap[CurrWord[Offset]].Shift + 1);
	// the reason for the Shift+1 is that you have a Shift +
	// the Offset which is one nibble
	return Length;
}


void CCompress::PutEndOfSequence(UInt8 *buff, short *offset)
{
	PutValue(buff, offset, 0);
	(*offset)++;
	PutValue(buff, offset, EOW);
	(*offset)++;
}

/****************************************************************************
  ResetReader is a simple function to reset the reader.  This is used for instance
  when begining to read a new compressed buffer.
***************************************************************************/

void CCompress::ResetReader(UInt8 *buff, short *offset)
{
	buff[0] = 0;
	(*offset) = 0;
}

/*

  void
  CCompress :: SetCharacterArray()
  {
  short Counter;

  for(Counter = 0; Counter < 256; Counter++)
  CharacterArray[0][Counter] = CountArray[Counter].Character;
  }
*/

long CCompress::LoadCharacterArray(Uio *Infile, long Offset)
{
#ifdef TRACE
	printf("\nCCompress :: LoadCharacterArray()");
#endif

	Infile->SetPos(Offset, SEEK_SET);
	if (ErrorFunc(0, GET) < eNo_Err)
		return (ErrorFunc(0, GET));

	Infile->ReadData(CharacterArray, 256);
	if (ErrorFunc(0, GET) < eNo_Err)
		return (ErrorFunc(0, GET));

	return OK;
}

/*
  void
  CCompress :: SetCharacterMap(MapStruct *CharMap)
  {
  memcpy(CharacterMap,CharMap,256);
  }
*/

/*
  // don't really need this function for the time being.
  short
  CCompress :: LoadCharacterMap(Uio *Infile, short Offset)
  {
  if(fseek(Infile,Offset,SEEK_SET) != 0) return ERROR;
  if(fread(CharacterMap,sizeof(MapStruct) * 256,1,Infile) != 1) return ERROR;
  return OK;
  }
*/

/************************************************************************
 * CountCharacters() Counts the number of characters in a file ignoring
 * newline characters and line feeds ("\n" & "\r").  The counts from this
 * are to be used to generate the compression algorithms for the wordlists.
 * Infile must be a pointer to a currently open file and CountArray is a
 * pointer to an array of CountStructs which is 256 in length.  The
 * respective character is placed in the CountStruct in the Character slot
 * as well as a respective count.
************************************************************************/
/*
long CCompress::CountCharacters(Uio *Infile)
{
	long Counter;
	long TotalCharacters = 0;
	unsigned char Word[MAXLINELENGTH];		// arbitrarily long.  Must be longer than a line length

#ifdef TRACE
	printf("\nCCompress :: CountCharacters()");
#endif

	if (CountArray == NULL)
		CountArray = (CountStruct *)malloc(sizeof(CountStruct) * 256);

	if (CountArray == NULL) {
		ErrorFunc(eNo_Mem, SET);
		return 0;
	}


	Infile->SetPos(0, SEEK_SET);
	if (ErrorFunc(0, GET) < eNo_Err) {
		ErrorFunc(eNo_Mem, SET);
		return 0;
	}


	memset(CountArray, 0, sizeof(CountStruct) * 256);

	// Set Character in struct to its character value
	for (Counter = 0; Counter < 256; Counter++)
		CountArray[Counter].Character = (unsigned char) 0xFF & Counter; // AP 6/11/95

	// read in all the strings
	while (Infile->getstring(Word, MAXLINELENGTH) != NULL) {
		// check to make sure the line isn't a comment
		// all comment lines start with a "#" which allows
		// copyright info etc. to be stuffed in the wordlist.

		if (Word[0] != '#') {
			for (Counter = 0; Word[Counter] != 0; Counter++) {
				if ((Word[Counter] == '\n') || (Word[Counter] == '\r')) {
					Word[Counter] = 0;
					break;
				}
			}

			for (Counter = 0; Word[Counter] != 0; Counter++) {
				CountArray[Word[Counter]].Count++;
				TotalCharacters++;
			}
		}
	}

	if (ErrorFunc(0, GET) < eNo_Err)
		return 0;

	Infile->SetPos(0, SEEK_SET);
	MakeCharacterArray();
	MakeMapArray();
	return TotalCharacters;
}


long CCompress::CountCharacters(FILE *Infile)
{
	long Counter;
	long TotalCharacters = 0;
	unsigned char Word[MAXLINELENGTH];		// arbitrarily long.  Must be longer than a line length

#ifdef TRACE
	printf("\nCCompress :: CountCharacters()");
#endif

	if (CountArray == NULL)
		CountArray = (CountStruct *)malloc(sizeof(CountStruct) * 256);

	if (CountArray == NULL) {
		ErrorFunc(eNo_Mem, SET);
		return 0;
	}


	if(fseek(Infile,0,SEEK_SET) != 0) {
		return ERROR;
	}

	//Infile->SetPos(0, SEEK_SET);
	//if (ErrorFunc(0, GET) < eNo_Err) {
	//	ErrorFunc(eNo_Mem, SET);
	//	return 0;
	//}

	memset(CountArray, 0, sizeof(CountStruct) * 256);

	// Set Character in struct to its character value
	for (Counter = 0; Counter < 256; Counter++)
		CountArray[Counter].Character = (unsigned char) 0xFF & Counter; // AP 6/11/95

	// read in all the strings
	while (fgets(Word,MAXLINELENGTH,Infile)) {
	//while (Infile->getstring(Word, MAXLINELENGTH) != NULL) {
		// check to make sure the line isn't a comment
		// all comment lines start with a "#" which allows
		// copyright info etc. to be stuffed in the wordlist.

		if (Word[0] != '#') {
			for (Counter = 0; Word[Counter] != 0; Counter++) {
				if ((Word[Counter] == '\n') || (Word[Counter] == '\r')) {
					Word[Counter] = 0;
					break;
				}
			}

			for (Counter = 0; Word[Counter] != 0; Counter++) {
				CountArray[Word[Counter]].Count++;
				TotalCharacters++;
			}
		}
	}

	if (ErrorFunc(0, GET) < eNo_Err)
		return 0;

	if(fseek(Infile,0,SEEK_SET) != 0)
		return ERROR;
	//Infile->SetPos(0, SEEK_SET);
	MakeCharacterArray();
	MakeMapArray();
	return TotalCharacters;
}
*/


/************************************************************************
 * SortByCountCompare() is the comparison routine which is called by
 * SortByCount().  It isn't part of the class because, it has all sorts
 * of conflicts when it is.  Better to just leave it separate.  The
 * routine is such that it should fix everything so that the characters
 * are in decending order by count.
***********************************************************************/
/*
int SortByCountCompare(const void *Key1, const void *Key2)
{
	CountStruct * Struct1;
	CountStruct * Struct2;

	Struct1 = (CountStruct *)Key1;
	Struct2 = (CountStruct *)Key2;

	if (Struct1->Count < Struct2->Count)
		return 1;
	if (Struct1->Count > Struct2->Count)
		return -1;
	return 0;
}
*/

/*************************************************************************
 * SortByCount() Sorts a CountArray by the Count.  The characters for each
 * count can be found by the Character field of the struct.
************************************************************************/
/*
void CCompress::SortByCount()
{
#ifdef TRACE
	printf("\nCCompress :: SortByCount()");
#endif

	qsort((void *) CountArray, 256, sizeof(CountStruct), SortByCountCompare);
}
*/


/*************************************************************************
 * MakeMapArray() sets the values in MapStruct so that they map the various
 * characters into Shift / Offset sequences.  The Shift / Offset sequences
 * can then be used to output the nibbles for the compressed dictionary.
************************************************************************/

/*
void CCompress::MakeMapArray()
{
	char Offset;
	char Shift;
	short Counter;

#ifdef TRACE
	printf("\nCCompress :: MakeMapArray()");
#endif

	memset(CharacterMap, 0, sizeof(MapStruct) * 256);
	SortByCount();
	Offset = 0;
	Shift = 0;

	for (Counter = 0; CountArray[Counter].Count != 0; Counter++) {
		// printf("\nCounter = %d  Offset = %d  Shift = %d",Counter,Offset,Shift);

		CharacterMap[(unsigned char)CountArray[Counter].Character].Offset = Offset;
		CharacterMap[(unsigned char)CountArray[Counter].Character].Shift = Shift;
		Offset++;
		if (Offset == 14) {
			// max number of characters in one series
			Shift++;
			Offset = 0;
		}
		// endif
	}
}


void CCompress::MakeCharacterArray()
{
	short Counter;

#ifdef TRACE
	printf("\nCCompress :: MakeCharacterArray()");
#endif

	memset(CharacterArray, 0, 256);
	SortByCount();
	for (Counter = 0; Counter < 256; Counter++)
		CharacterArray[0][Counter] = CountArray[Counter].Character;
}

*/
/*************************************************************************
 * SaveCharacterArray() is used to output the character array which is
 * needed for decoding the compressed dictionary pages.
 * It takes a pointer to an open file, an offset into the open file.
 * 256 bytes will be written to the place pointed to by Offset.
************************************************************************/


long CCompress::SaveCharacterArray(Uio *OutFile, long Offset)
{
#ifdef TRACE
	printf("\nCCompress :: SaveCharacterArray()");
#endif


	OutFile->SetPos(Offset, SEEK_SET);
	if (ErrorFunc(0, GET) < eNo_Err)
		return (ErrorFunc(0, GET));

	OutFile->WriteData(CharacterArray, 256);
	if (ErrorFunc(0, GET) < eNo_Err)
		return (ErrorFunc(0, GET));

	return OK;
}

long CCompress::SaveCharacterArray(FILE *OutFile, long Offset)
{
#ifdef TRACE
	printf("\nCCompress :: SaveCharacterArray()");
#endif


	if(fseek(OutFile,Offset,SEEK_SET) != 0)
		return ERROR;
	//OutFile->SetPos(Offset, SEEK_SET);
	//if (ErrorFunc(0, GET) < eNo_Err)
	//	return (ErrorFunc(0, GET));

	if(fwrite(CharacterArray,1,256,OutFile) < 256)
		return ERROR;
	//OutFile->WriteData(CharacterArray, 256);
	//if (ErrorFunc(0, GET) < eNo_Err)
	//	return (ErrorFunc(0, GET));

	return OK;
}


#ifdef TESTCCOMPRESS
void CCompress::PrintCountArray()
{
	short Counter;

#ifdef TRACE
	printf("\nCCompress :: PrintCountArray()");
#endif

	SortByCount();
	for (Counter = 0; CountArray[Counter].Character != 0; Counter++)
		if ((CountArray[Counter].Character >= 'a') && (CountArray[Counter].Character <= 'z'))
			printf("\n Character: %c  Count: %ld", CountArray[Counter].Character, CountArray[Counter].Count);
		else
			printf("\n Character:    Count: %ld", CountArray[Counter].Count);
}


void CCompress::PrintMapArray()
{
	short Counter;

#ifdef TRACE
	printf("\nCCompress :: PrintMapArray()");
#endif

	for (Counter = 0; Counter < 256; Counter++)
		if ((Counter >= 'a') && (Counter <= 'z'))
			printf("\n Character : %c  Value : %d  Offset: %d  Shift : %d",
				   Counter, Counter, CharacterMap[Counter].Offset, CharacterMap[Counter].Shift);
		else
			printf("\n Character :    Value : %d  Offset: %d  Shift : %d", Counter, CharacterMap[Counter].Offset, CharacterMap[Counter]
				   .Shift);
}

#endif




